"""
@Author: Bright
@File: recursively_split.py
@Time: 2025/9/28
@Desc: 递归分割
pip install -qU langchain-text-splitters
"""

from langchain_text_splitters import RecursiveCharacterTextSplitter

file_path = "../resource/knowledge.txt"
with open(file_path, encoding="UTF-8") as f:
    state_of_the_union = f.read()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,  # 设置一个非常小的块，只是为了展示。
    chunk_overlap=20, # 块之间的目标重叠，重叠的块有助于上下文分割时，减少信息丢失
    length_function=len, ## 确定块大小的函数。这里是根据长度划分len（）。
    is_separator_regex=False ## 分割符列表，默认是["\n\n","\n"," ",""],是否应被解释为正则表达式。
)
## 注意这里的参数要加[]
texts = text_splitter.create_documents([state_of_the_union])
# for text in texts:
#     print(text)
print(texts[0])
print("--" * 50)
print(texts[1])
